##Setup Install the correct packages and load the libraries in. If you have not installed the tidyverse package, make sure you uncomment the below lines.
#install.packages("tidyverse")
#install.packages("lubridate")
library(tidyverse)
library(lubridate)
#load the data
wichita <- read_csv("https://datajournalism.tech/wp-content/uploads/2019/10/wichita.csv")
population <- tibble(subject_race= c("asian/pacific islander", "black", "hispanic", "other/unknown", "white"), num_people=c(19272, 42679, 63659, 13351, 246343))
center_lat <- 37.692963
center_lng <- -97.323992
Explore the dataset provided by Stanford University. See more on their website https://openpolicing.stanford.edu.
View(wichita) #to view the data table
str(wichita) #to see the characteristics of variables
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 57750 obs. of 22 variables:
## $ X1 : num 1 2 3 4 5 6 7 8 9 10 ...
## $ raw_row_number : chr "923578" "923657" "912091" "923680" ...
## $ date : Date, format: "2016-01-01" "2016-01-01" ...
## $ time : 'hms' num 18:00:00 18:08:00 18:11:00 18:13:00 ...
## ..- attr(*, "units")= chr "secs"
## $ location : chr "N WEST ST, KS, 67205" "8000 W 13TH ST N, WICHITA, KS, 67212" "500 S LIMUEL ST, WICHITA, KS, 67235" "7600 W 21ST ST N, WICHITA, KS, 67205" ...
## $ lat : num 37.7 37.7 37.7 37.7 37.7 ...
## $ lng : num -97.4 -97.4 -97.5 -97.4 -97.4 ...
## $ subject_age : num 16 44 20 21 28 27 15 20 23 NA ...
## $ subject_race : chr "white" "white" "white" "hispanic" ...
## $ subject_sex : chr "female" "male" "male" "female" ...
## $ type : chr "vehicular" "vehicular" "vehicular" "vehicular" ...
## $ disposition : chr "DISMISSED" "GUILTY (IVR)" "DISMISSED WITH PREJUDICE; DISMISSED WITH PREJUDICE" "GUILTY" ...
## $ violation : chr "RUN STOP SIGN" "SPEED OVER LIMIT" "DUI; INATTENTIVE DRIVING" "SPEED OVER LIMIT" ...
## $ citation_issued : logi TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ outcome : chr "citation" "citation" "citation" "citation" ...
## $ posted_speed : num NA 40 NA 40 40 40 NA NA NA NA ...
## $ vehicle_color : chr "BURGUNDY OR MAROON" "\"ALUMINUM, SILVER\"" "WHITE" "\"ALUMINUM, SILVER\"" ...
## $ vehicle_make : chr "JEEP (1989 TO PRESENT)" "HYUNDAI" "HONDA" "TOYOTA" ...
## $ vehicle_model : chr NA "TUCSON" NA NA ...
## $ vehicle_year : num 2008 NA NA NA NA ...
## $ raw_defendant_race : chr "W" "W" "W" "W" ...
## $ raw_defendant_ethnicity: chr "N" "N" "N" "H" ...
## - attr(*, "spec")=
## .. cols(
## .. X1 = col_double(),
## .. raw_row_number = col_character(),
## .. date = col_date(format = ""),
## .. time = col_time(format = ""),
## .. location = col_character(),
## .. lat = col_double(),
## .. lng = col_double(),
## .. subject_age = col_double(),
## .. subject_race = col_character(),
## .. subject_sex = col_character(),
## .. type = col_character(),
## .. disposition = col_character(),
## .. violation = col_character(),
## .. citation_issued = col_logical(),
## .. outcome = col_character(),
## .. posted_speed = col_double(),
## .. vehicle_color = col_character(),
## .. vehicle_make = col_character(),
## .. vehicle_model = col_character(),
## .. vehicle_year = col_double(),
## .. raw_defendant_race = col_character(),
## .. raw_defendant_ethnicity = col_character()
## .. )
glimpse(wichita) #to see a short summary of values in each column
## Observations: 57,750
## Variables: 22
## $ X1 <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,...
## $ raw_row_number <chr> "923578", "923657", "912091", "923680"...
## $ date <date> 2016-01-01, 2016-01-01, 2016-01-01, 2...
## $ time <time> 18:00:00, 18:08:00, 18:11:00, 18:13:0...
## $ location <chr> "N WEST ST, KS, 67205", "8000 W 13TH S...
## $ lat <dbl> 37.74143, 37.70880, 37.67482, 37.72402...
## $ lng <dbl> -97.38976, -97.44059, -97.48999, -97.4...
## $ subject_age <dbl> 16, 44, 20, 21, 28, 27, 15, 20, 23, NA...
## $ subject_race <chr> "white", "white", "white", "hispanic",...
## $ subject_sex <chr> "female", "male", "male", "female", "m...
## $ type <chr> "vehicular", "vehicular", "vehicular",...
## $ disposition <chr> "DISMISSED", "GUILTY (IVR)", "DISMISSE...
## $ violation <chr> "RUN STOP SIGN", "SPEED OVER LIMIT", "...
## $ citation_issued <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TR...
## $ outcome <chr> "citation", "citation", "citation", "c...
## $ posted_speed <dbl> NA, 40, NA, 40, 40, 40, NA, NA, NA, NA...
## $ vehicle_color <chr> "BURGUNDY OR MAROON", "\"ALUMINUM, SIL...
## $ vehicle_make <chr> "JEEP (1989 TO PRESENT)", "HYUNDAI", "...
## $ vehicle_model <chr> NA, "TUCSON", NA, NA, "SILVERADO", "NE...
## $ vehicle_year <dbl> 2008, NA, NA, NA, NA, NA, NA, 2008, 20...
## $ raw_defendant_race <chr> "W", "W", "W", "W", "W", "W", "W", "W"...
## $ raw_defendant_ethnicity <chr> "N", "N", "N", "H", "H", "N", "H", "H"...
colnames(wichita) #to view column headers
## [1] "X1" "raw_row_number"
## [3] "date" "time"
## [5] "location" "lat"
## [7] "lng" "subject_age"
## [9] "subject_race" "subject_sex"
## [11] "type" "disposition"
## [13] "violation" "citation_issued"
## [15] "outcome" "posted_speed"
## [17] "vehicle_color" "vehicle_make"
## [19] "vehicle_model" "vehicle_year"
## [21] "raw_defendant_race" "raw_defendant_ethnicity"
After viewing the dataset, you can analyze it to see the min, max, mean, median and other values for each variable. These are called descriptive statistics.
summary(wichita)
## X1 raw_row_number date time
## Min. : 1 Length:57750 Min. :2016-01-01 Length:57750
## 1st Qu.:14438 Class :character 1st Qu.:2016-03-16 Class1:hms
## Median :28876 Mode :character Median :2016-05-29 Class2:difftime
## Mean :28876 Mean :2016-06-10 Mode :numeric
## 3rd Qu.:43313 3rd Qu.:2016-08-31
## Max. :57750 Max. :2016-12-31
##
## location lat lng subject_age
## Length:57750 Min. :37.47 Min. :-101.36 Min. :11.00
## Class :character 1st Qu.:37.67 1st Qu.: -97.37 1st Qu.:24.00
## Mode :character Median :37.69 Median : -97.34 Median :33.00
## Mean :37.69 Mean : -97.33 Mean :36.71
## 3rd Qu.:37.70 3rd Qu.: -97.28 3rd Qu.:48.00
## Max. :38.48 Max. : -96.75 Max. :99.00
## NA's :1167 NA's :1167 NA's :10128
## subject_race subject_sex type
## Length:57750 Length:57750 Length:57750
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## disposition violation citation_issued outcome
## Length:57750 Length:57750 Mode:logical Length:57750
## Class :character Class :character TRUE:57750 Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## posted_speed vehicle_color vehicle_make vehicle_model
## Min. : 20.00 Length:57750 Length:57750 Length:57750
## 1st Qu.: 30.00 Class :character Class :character Class :character
## Median : 40.00 Mode :character Mode :character Mode :character
## Mean : 39.93
## 3rd Qu.: 40.00
## Max. :304.00
## NA's :35149
## vehicle_year raw_defendant_race raw_defendant_ethnicity
## Min. :1962 Length:57750 Length:57750
## 1st Qu.:2001 Class :character Class :character
## Median :2005 Mode :character Mode :character
## Mean :2005
## 3rd Qu.:2009
## Max. :2999
## NA's :43236
the group_by verb helps you categorize your values into fewer groups. The summarize verb always goes along with the group_by to help count the number of values for each group and compute the percentage of each group over the whole population.
race <- group_by(wichita,subject_race) %>% summarize(value=n())
stop_rate <- left_join(population, race, by= "subject_race") %>% mutate(stop_rate = value/num_people)
##Data Visualization
###Bar Chart
#make the plot
bar <- ggplot(stop_rate,
aes(x=reorder(subject_race,stop_rate), y=stop_rate))+
geom_bar(stat="identity",
position="identity",
fill="red")+
geom_hline(yintercept = 0) +
labs(title="Drivers stopped by police in Wichita Kansas (2016)",
subtitle = "African American drivers got stopped more than white drivers")+
coord_flip()
options(scipen=10000)
bar
You can export the graphics by running the names of the objects in the Console and click the Export button under the Plots tab. Otherwise, you can use one of the following codes to export it:
ggsave("bar.png", width=40, height=20, units= "cm")
ggsave("bar.svg", width=40, height=20, units= "cm")
#ggsave only saves the last plot you created, so you may want to go up to the line chart and write ggsave("line.png")
###Interactive Map with leaflet
#install.packages("httpuv")
#install.packages("leaflet")
library(httpuv)
library(leaflet)
race <- colorFactor(c("white", "black", "gold", "darkorange", "gray53"), domain=c("white", "black", "asian/pacific islander", "hispanic", "other/unknown"), ordered=TRUE)
map <- leaflet(wichita) %>%
addProviderTiles(providers$OpenStreetMap) %>%
setView(lng=center_lng, lat= center_lat, zoom=10) %>%
addCircleMarkers(~lng, ~lat, popup=paste("This is a", wichita$subject_race, "and", wichita$subject_sex, "driver."), weight=2, radius=2, color=~race(subject_race), stroke=F, fillOpacity=1)
## Warning in validateCoords(lng, lat, funcName): Data contains 1167 rows with
## either missing or invalid lat/lon values and will be ignored
map